Data Science Project - Dubai Real Estate Goldmine, UAE Rental Market Data¶

  • This dataset can be found on Kaggle : https://www.kaggle.com/datasets/azharsaleem/real-estate-goldmine-dubai-uae-rental-market
In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px 
In [2]:
df = pd.read_csv("dubai_Rent.csv")
In [3]:
df.head(3)
Out[3]:
Address Rent Beds Baths Type Area_in_sqft Rent_per_sqft Rent_category Frequency Furnishing Purpose Posted_date Age_of_listing_in_days Location City Latitude Longitude
0 The Gate Tower 2, The Gate Tower, Shams Gate D... 124000 3 4 Apartment 1785 69.467787 Medium Yearly Unfurnished For Rent 2024-03-07 45 Al Reem Island Abu Dhabi 24.493598 54.407841
1 Water's Edge, Yas Island, Abu Dhabi 140000 3 4 Apartment 1422 98.452883 Medium Yearly Unfurnished For Rent 2024-03-08 44 Yas Island Abu Dhabi 24.494022 54.607372
2 Al Raha Lofts, Al Raha Beach, Abu Dhabi 99000 2 3 Apartment 1314 75.342466 Medium Yearly Furnished For Rent 2024-03-21 31 Al Raha Beach Abu Dhabi 24.485931 54.600939
In [4]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 73742 entries, 0 to 73741
Data columns (total 17 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Address                 73742 non-null  object 
 1   Rent                    73742 non-null  int64  
 2   Beds                    73742 non-null  int64  
 3   Baths                   73742 non-null  int64  
 4   Type                    73742 non-null  object 
 5   Area_in_sqft            73742 non-null  int64  
 6   Rent_per_sqft           73742 non-null  float64
 7   Rent_category           73742 non-null  object 
 8   Frequency               73742 non-null  object 
 9   Furnishing              73742 non-null  object 
 10  Purpose                 73742 non-null  object 
 11  Posted_date             73742 non-null  object 
 12  Age_of_listing_in_days  73742 non-null  int64  
 13  Location                73742 non-null  object 
 14  City                    73742 non-null  object 
 15  Latitude                73023 non-null  float64
 16  Longitude               73023 non-null  float64
dtypes: float64(3), int64(5), object(9)
memory usage: 9.6+ MB
In [5]:
df["Posted_date"] = pd.to_datetime(df["Posted_date"])
In [6]:
numerical_columns = [co for co in df.columns if df[co].dtype != 'object']
categorical_columns = [col for col in df.columns if df[col].dtype == 'object']
print("Numerical Columns : ",numerical_columns)
print("--------------------------------------------")
print("categorical Columns : ",categorical_columns)
Numerical Columns :  ['Rent', 'Beds', 'Baths', 'Area_in_sqft', 'Rent_per_sqft', 'Posted_date', 'Age_of_listing_in_days', 'Latitude', 'Longitude']
--------------------------------------------
categorical Columns :  ['Address', 'Type', 'Rent_category', 'Frequency', 'Furnishing', 'Purpose', 'Location', 'City']
In [7]:
df.isna().sum()
Out[7]:
Address                     0
Rent                        0
Beds                        0
Baths                       0
Type                        0
Area_in_sqft                0
Rent_per_sqft               0
Rent_category               0
Frequency                   0
Furnishing                  0
Purpose                     0
Posted_date                 0
Age_of_listing_in_days      0
Location                    0
City                        0
Latitude                  719
Longitude                 719
dtype: int64
In [8]:
df =df.dropna()
In [9]:
for i in categorical_columns:
    print(df[i].unique())
    print("-------------------------------------------------------------------------")
['The Gate Tower 2, The Gate Tower, Shams Gate District, Shams Abu Dhabi, Al Reem Island, Abu Dhabi'
 "Water's Edge, Yas Island, Abu Dhabi"
 'Al Raha Lofts, Al Raha Beach, Abu Dhabi' ...
 'Umm Al Quwain Marina, Umm Al Quwain'
 'Al Humrah B, Al Humrah, Umm Al Quwain'
 'Al Huboob 1, Al Salamah, Umm Al Quwain']
-------------------------------------------------------------------------
['Apartment' 'Penthouse' 'Villa' 'Townhouse' 'Villa Compound'
 'Residential Building' 'Residential Floor' 'Hotel Apartment'
 'Residential Plot']
-------------------------------------------------------------------------
['Medium' 'High' 'Low']
-------------------------------------------------------------------------
['Yearly']
-------------------------------------------------------------------------
['Unfurnished' 'Furnished']
-------------------------------------------------------------------------
['For Rent']
-------------------------------------------------------------------------
['Al Reem Island' 'Yas Island' 'Al Raha Beach' 'Al Bateen' 'Al Reef'
 'The Marina' 'Al Khalidiyah' 'Al Raha Gardens' 'Mohammed Bin Zayed City'
 'Shakhbout City' 'Madinat Al Riyadh' 'Corniche Area' 'Al Muroor'
 'Zayed Sports City' 'Khalifa City' 'Hamdan Street' 'Al Bahia'
 'Masdar City' 'Al Najda Street' 'Tourist Club Area (TCA)' 'Al Matar'
 'Al Shamkha' 'Rawdhat Abu Dhabi' 'Al Ghadeer' 'Danet Abu Dhabi' 'Baniyas'
 'Al Mushrif' 'Airport Street' 'Al Jubail Island' 'Al Hosn'
 'Saadiyat Island' 'Corniche Road' 'Sheikh Khalifa Bin Zayed Street'
 'Electra Street' 'Al Falah Street' 'Al Wahdah' 'Madinat Zayed' 'Rabdan'
 'Al Rahba' 'Al Maqtaa' 'Capital Centre' 'Al Karamah' 'Al Markaziya'
 'Defence Street' 'Al Nahyan' 'Between Two Bridges (Bain Al Jessrain)'
 'Al Nasr Street' 'Al Zahraa' 'Al Muntazah' 'Al Zaab' 'Al Samha'
 'Al Shawamekh' 'Hydra Village' 'Sas Al Nakhl Village' 'Al Zahiyah'
 'Al Rawdah' 'Sheikh Rashid Bin Saeed Street' 'Al Maryah Island'
 'Al Manhal' 'Zayed City' 'Al Aman' 'Al Shahama' 'Mussafah' 'Al Khubeirah'
 'Al Mina' 'Al Falah City' 'Al Salam Street' 'Al Qurm' 'Al Ras Al Akhdar'
 'Al Danah' 'Al Dhafrah' 'KIZAD' 'Al Wathba' 'Liwa Street' 'Muwaylih'
 'Al Nahda' 'Al Yasmeen' 'Al Rawda' 'Al Zahya' 'Al Nuaimiya' 'Al Mowaihat'
 'Al Sawan' 'Al Helio' 'Al Alia' 'Corniche Ajman' 'Ajman Downtown'
 'Al Rashidiya' 'Al Nakhil' 'Al Jurf' 'Ajman Industrial' 'Al Hamidiyah'
 'Garden City' 'Masfoot' 'Musherief' 'Al Zorah' 'Al Bustan'
 'Emirates City' 'Al Rumaila' 'Sheikh Maktoum Bin Rashid Street'
 'Ajman Free Zone' 'Al Tallah 2' 'Al Tallah 1' 'Al Ameera Village'
 'Asharij' 'Al Marakhaniya' 'Shiab Al Ashkhar' 'Al Sarouj' 'Al Jimi'
 'Al Tiwayya' 'Zakhir' 'Al Maqam' 'Al Jahili' 'Al Khibeesi' 'Al Hayer'
 'Al Iqabiyyah' 'Central District' 'Hili' 'Al Rawdah Al Sharqiyah'
 'Al Muwaiji' 'Al Yahar' 'Falaj Hazzaa' 'Al Mutarad' 'Al Sidrah' 'Neima'
 "Al Mutaw'ah" 'Al Dhahir' 'Al Qattara' 'Um Ghafah' "Al Fou'ah"
 'Abu Samrah' 'Al Masoudi' 'Ghnaymah' 'Jumeirah Village Circle (JVC)'
 'Dubai Hills Estate' 'Arabian Ranches 2' 'Dubai Silicon Oasis (DSO)'
 'Dubai Sports City' 'Town Square' 'Meydan City' 'Dubai Creek Harbour'
 'Dubai Harbour' 'Jumeirah Beach Residence (JBR)' 'Palm Jumeirah' 'Mirdif'
 'DAMAC Hills 2 (Akoya by DAMAC)' 'Al Jaddaf' 'Dubailand'
 'Jumeirah Golf Estates' 'Dubai South' 'Dubai Marina' 'Al Furjan'
 'The Valley' 'Downtown Dubai' 'Arjan' 'Ras Al Khor' 'Reem' 'DAMAC Hills'
 'Umm Suqeim' 'Jumeirah Heights' 'Mudon' 'Business Bay'
 'Jumeirah Lake Towers (JLT)' 'Tilal Al Ghaf' "Za'abeel"
 'Arabian Ranches 3' 'Motor City' 'The Views' 'The Meadows' 'Al Wasl'
 'Jumeirah' 'Sheikh Zayed Road' 'Arabian Ranches' 'Jumeirah Park'
 'Dubai Residence Complex' 'The Springs' 'Mohammed Bin Rashid City'
 'Serena' 'Dubai Production City (IMPZ)' 'Nad Al Sheba' 'The Greens'
 'Sobha Hartland' 'Bur Dubai' 'Dubai Studio City' 'Green Community'
 'Jumeirah Islands' 'Jumeirah Village Triangle (JVT)' 'The Villa'
 'Al Barari' 'Al Barsha' 'Al Nahda (Dubai)' 'DIFC' 'Liwan'
 'Living Legends' 'Discovery Gardens' 'Barsha Heights (Tecom)' 'Al Karama'
 'Remraam' 'Bluewaters Island' 'Dubai Media City' 'International City'
 'The Lakes' 'City of Arabia' 'World Trade Centre' 'Bukadra'
 'Dubai Festival City' 'Culture Village' 'Deira' 'Falcon City of Wonders'
 'Al Safa' 'Majan' 'Liwan 2' 'Al Quoz' 'The Sustainable City'
 'Umm Al Sheif' 'Jebel Ali' 'Nad Al Hamar' 'Al Warqaa' 'Al Qusais'
 'Al Awir' 'Al Satwa' 'The Hills' 'Muhaisnah' 'Al Khawaneej' 'The Gardens'
 'Expo City' 'Dubai Investment Park (DIP)' 'Dubai Waterfront'
 'Dubai Industrial City' 'Dubai Internet City' 'Al Sufouh'
 'Dubai Maritime City' 'Emirates Hills' 'Al Warsan' 'Al Badaa' 'Al Mizhar'
 'Al Jafiliya' 'Al Garhoud' 'Wasl Gate' 'Al Mamzar' 'Wadi Al Shabak'
 'Al Hudaiba' 'Pearl Jumeirah' 'Al Manara' 'Wadi Al Safa 2' 'Al Twar'
 'Hadaeq Sheikh Mohammed Bin Rashid' 'Knowledge Village' 'Umm Ramool'
 'Al Hebiah 2' 'Al Lisaili' 'Oud Al Muteena' 'Fujairah Tower'
 'Fujairah Freezone' 'Dibba' 'Tawyeen' 'Mirbah'
 'Address Fujairah Beach Resort' 'Sakamkam' 'Al Marjan Island'
 'Al Hamra Village' 'Al Nakheel' 'Mina Al Arab' 'Al Qusaidat'
 'The Cove Rotana Resort' 'Dafan Al Nakheel' 'Rak City' 'Al Seer'
 'Yasmin Village' 'Al Mairid' 'Al Dhait' 'Khuzam' 'Dafan Al Khor'
 'Seih Al Uraibi' 'Sidroh' 'Al Uraibi' 'Wadi Ammar' 'Dahan' 'Al Ghubb'
 'Al Nudood' 'Al Kharran' 'Julfar' 'Al Sharisha' 'Al Rams' 'Aljada'
 'Al Khan' 'Muwaileh' 'Al Nahda (Sharjah)' 'Al Tai' 'Al Taawun'
 'Muwailih Commercial' 'Al Majaz' 'Al Wahda Street' 'Industrial Area'
 'Al Qasimia' 'Tilal City' 'Al Rahmaniya' 'Sharqan' 'Al Qasba' 'Al Fisht'
 'Abu Shagara' 'Al Mujarrah' 'Barashi' 'Al Mareija' 'Al Dhaid'
 'Al Ramaqiya' 'Al Nabba' 'Al Sharq' 'Al Nasserya' 'Rolla Area'
 'Al Ghuwair' 'Al Ramtha' 'Um Tarafa' 'Bu Tina' 'Al Mahatah' 'Al Musalla'
 'Al Soor' 'Al Ramla' 'Al Falaj' 'Al Jazzat' 'Maysaloon' 'Al Nekhailat'
 'Al Mansoura' 'Hoshi' 'Al Abar' 'Al Fayha' 'Samnan' 'Al Yarmook'
 'Al Jubail' 'Al Noaf' 'Al Sajaa' 'Al Shahba' 'Al Manakh' 'Khor Fakkan'
 'Al Ghafia' 'Al Sabkha' 'Kalba' 'Dasman' 'Al Darari' 'Al Juraina'
 'Sharjah University City' 'Al Gharb' 'Al Sajaa Industrial' 'Al Tay East'
 'Al Mirgab' 'Al Riqaibah' 'Al Rifa' 'Al Ghubaiba' 'Al Riqqa Suburb'
 'Al Yash' 'Al Bataeh' 'Al Ramlah' 'Al Butain' 'Al Salamah' 'Al Abraq 1'
 'Al Qarayen' 'Old Town Area' 'Umm Al Quwain Marina' 'Al Hawiyah'
 'Al Humrah']
-------------------------------------------------------------------------
['Abu Dhabi' 'Ajman' 'Al Ain' 'Dubai' 'Fujairah' 'Ras Al Khaimah'
 'Sharjah' 'Umm Al Quwain']
-------------------------------------------------------------------------
In [10]:
df.describe()
Out[10]:
Rent Beds Baths Area_in_sqft Rent_per_sqft Posted_date Age_of_listing_in_days Latitude Longitude
count 7.302300e+04 73023.000000 73023.000000 73023.000000 73023.000000 73023 73023.000000 73023.000000 73023.000000
mean 1.483723e+05 2.154458 2.638771 2035.634471 88.537296 2024-02-07 02:45:58.900620544 73.884735 24.918929 55.053133
min 0.000000e+00 0.000000 1.000000 74.000000 0.000000 2018-01-27 00:00:00 11.000000 15.175847 43.351928
25% 5.499900e+04 1.000000 2.000000 850.000000 40.000000 2024-01-17 00:00:00 30.000000 24.493598 54.607372
50% 9.800000e+04 2.000000 2.000000 1329.000000 71.813285 2024-03-01 00:00:00 51.000000 25.078641 55.238209
75% 1.700000e+05 3.000000 3.000000 2101.000000 119.047619 2024-03-22 00:00:00 95.000000 25.197978 55.367138
max 5.500000e+07 12.000000 11.000000 210254.000000 2182.044888 2024-04-10 00:00:00 2276.000000 25.920310 56.361294
std 3.082652e+05 1.571260 1.620881 2976.159891 66.627532 NaN 71.837749 0.569356 0.653722
In [11]:
vacant_buildings = df["Type"][df["Rent"]<=1].count()
print("Count of Building =",df["Rent"].count())
print("Count of Occupied Building =",len(df)-vacant_buildings,"percentage  % =",round((len(df)-vacant_buildings)*100/len(df),3))
print("Count of Vacant Building =",vacant_buildings,"percentage  % =",round(vacant_buildings*100/len(df),3))
Count of Building = 73023
Count of Occupied Building = 73006 percentage  % = 99.977
Count of Vacant Building = 17 percentage  % = 0.023
In [12]:
df_dummies =df.drop(categorical_columns,axis=1)
plt.figure(figsize=(7,5))
sns.heatmap(df_dummies.corr(),annot=True,fmt="0.2f")
Out[12]:
<Axes: >
No description has been provided for this image
In [13]:
category = df.groupby(df["City"])["Type"].count().sort_values()
px.bar(category,y="Type",color="Type",title ="Comparing rental orders across different cities",
       labels={'Type': 'Order'},width=1000,height=500)
In [14]:
px.scatter_mapbox(df,lat="Latitude", lon="Longitude",title='Rental Properties Locations in UAE',
                  mapbox_style="open-street-map",zoom=6, height=600)
In [15]:
category = df.groupby("City")["Rent"].mean().reset_index()
category.columns = ['City', 'Rent_mean']
px.bar(category,x="City",y="Rent_mean",color="Rent_mean",title ="Comparing rental prices across different cities",width=1000,height=500)
In [16]:
category = df[["City","Furnishing"]].groupby("City").value_counts().reset_index()
category.columns=["City","Furnishing","Count"]
px.bar(category,x="City",y="Count",color="Furnishing",title ="Number of furnished and unfurnished rentals per country",width=1000,height=500)
In [17]:
category = df.groupby(df["Type"])["Rent"].count().sort_values()
px.bar(category,y="Rent",color="Rent",title ="Comparing rental orders across different property types",
       labels={'Rent': 'Order'},width=1000,height=500)
In [18]:
category = df.groupby(["Type","Rent_category"])["Rent"].count().reset_index()
px.bar(category,x="Type" ,y="Rent",color="Rent_category",title='Rent Category Distribution by Property Type',width=1000,height=500)
In [19]:
plt.figure(figsize=(20,20))
px.histogram(data_frame=df,x="Furnishing",y="Rent",color="Furnishing",title="Compare property rents by furnishings")
<Figure size 2000x2000 with 0 Axes>
In [20]:
px.pie(df,values="Rent",names="Rent_category",width=1000,height=500)
In [21]:
px.pie(df,values="Rent_per_sqft",names="Rent_category",width=1000,height=500)
In [22]:
px.violin(data_frame=df,x="Rent_category",y="Area_in_sqft",color="Rent_category",title="Relationship between rent category and area")
In [23]:
avg_df = df.groupby(df["Posted_date"].dt.to_period("M"))["Rent"].mean().reset_index()
avg_df["Posted_date"]=avg_df["Posted_date"].astype(str)
px.line(avg_df, x='Posted_date', y='Rent',title='Average Monthly Rent Prices Over Time',
        labels={'Posted_date': 'Month', 'Rent': 'Average Rent'},width=1100,height=500)
In [24]:
px.scatter(data_frame=df,x="Area_in_sqft",y="Rent",title="Relationship between rent and area")
In [25]:
px.histogram(data_frame=df,x="Beds",y="Rent",title="Relationship between rent and beds")
In [26]:
px.histogram(data_frame=df,x="Baths",y="Rent",title="Relationship between rent and baths")
In [28]:
px.area(df,x="Beds",y="Baths")
  • Top 5 Best and Worst Rental Locations
In [40]:
top_5_best = df.groupby("Location")["Rent"].sum().sort_values().reset_index().head(5)
px.bar(top_5_best,x="Location",y="Rent",title ="Top 5 Best Rental Locations",width=1000,height=500)
In [42]:
top_5_worst = df.groupby("Location")["Rent"].sum().sort_values().reset_index().tail(5)
px.bar(top_5_worst,x="Location",y="Rent",title ="Top 5 Worst Rental Locations",width=1000,height=500)